import pandas as pd
import numpy as np
import statistics #mathematical statistics
import matplotlib.pyplot as plt
import plotly.express as px #create entire figures
import seaborn as sns #data visuals
#html export
import plotly.io as pio
pio.renderers.default = 'notebook'
df_psy = pd.read_csv('psy_data_setEN.csv')
df_psy
| Person | age | married | orientation | religion | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 | Q8 | Q9 | Q10 | Q11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 16 | Never | Hetero | Other | 1.0 | 7.0 | 7.0 | 7.0 | 7.0 | 5.0 | 1.0 | 4.0 | 2.0 | 4.0 | 4.0 |
| 1 | 2 | 16 | Never | New Types | Christian Other | 6.0 | 4.0 | 7.0 | 4.0 | 7.0 | 7.0 | 1.0 | 4.0 | 2.0 | 4.0 | 4.0 |
| 2 | 3 | 17 | Never | Homo | Christian | 2.0 | 2.0 | 2.0 | 6.0 | 5.0 | 5.0 | 3.0 | 3.0 | 4.0 | 3.0 | 4.0 |
| 3 | 4 | 13 | Never | New Types | Christian | 1.0 | 7.0 | 4.0 | 4.0 | 6.0 | 1.0 | 6.0 | 2.0 | 2.0 | 2.0 | 1.0 |
| 4 | 5 | 19 | Never | Hetero | Muslim | 2.0 | 3.0 | 6.0 | 5.0 | 5.0 | 6.0 | 3.0 | 2.0 | 3.0 | 4.0 | 4.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 39774 | 39772 | 21 | Never | New Types | Muslim | 4.0 | 5.0 | 7.0 | 6.0 | 4.0 | 7.0 | 4.0 | 3.0 | 3.0 | 4.0 | 4.0 |
| 39775 | 39773 | 48 | Married | Hetero | Christian Other | 6.0 | 7.0 | 5.0 | 3.0 | 6.0 | 1.0 | 5.0 | 2.0 | 2.0 | 1.0 | 2.0 |
| 39776 | 39774 | 20 | Never | Hetero | Protestant | 1.0 | 5.0 | 7.0 | 5.0 | 3.0 | 5.0 | 3.0 | 3.0 | 2.0 | 4.0 | 4.0 |
| 39777 | 39775 | 26 | Never | Hetero | Muslim | 6.0 | 3.0 | 5.0 | 3.0 | 5.0 | 5.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 |
| 39778 | 39775 | 26 | Never | Hetero | Muslim | 6.0 | 3.0 | 5.0 | 3.0 | 5.0 | 5.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 |
39779 rows × 16 columns
Data Wrangling involves:
#dropping person column
df_psy = df_psy.drop(df_psy.columns[0], axis =1)
df_psy
| age | married | orientation | religion | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 | Q8 | Q9 | Q10 | Q11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16 | Never | Hetero | Other | 1.0 | 7.0 | 7.0 | 7.0 | 7.0 | 5.0 | 1.0 | 4.0 | 2.0 | 4.0 | 4.0 |
| 1 | 16 | Never | New Types | Christian Other | 6.0 | 4.0 | 7.0 | 4.0 | 7.0 | 7.0 | 1.0 | 4.0 | 2.0 | 4.0 | 4.0 |
| 2 | 17 | Never | Homo | Christian | 2.0 | 2.0 | 2.0 | 6.0 | 5.0 | 5.0 | 3.0 | 3.0 | 4.0 | 3.0 | 4.0 |
| 3 | 13 | Never | New Types | Christian | 1.0 | 7.0 | 4.0 | 4.0 | 6.0 | 1.0 | 6.0 | 2.0 | 2.0 | 2.0 | 1.0 |
| 4 | 19 | Never | Hetero | Muslim | 2.0 | 3.0 | 6.0 | 5.0 | 5.0 | 6.0 | 3.0 | 2.0 | 3.0 | 4.0 | 4.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 39774 | 21 | Never | New Types | Muslim | 4.0 | 5.0 | 7.0 | 6.0 | 4.0 | 7.0 | 4.0 | 3.0 | 3.0 | 4.0 | 4.0 |
| 39775 | 48 | Married | Hetero | Christian Other | 6.0 | 7.0 | 5.0 | 3.0 | 6.0 | 1.0 | 5.0 | 2.0 | 2.0 | 1.0 | 2.0 |
| 39776 | 20 | Never | Hetero | Protestant | 1.0 | 5.0 | 7.0 | 5.0 | 3.0 | 5.0 | 3.0 | 3.0 | 2.0 | 4.0 | 4.0 |
| 39777 | 26 | Never | Hetero | Muslim | 6.0 | 3.0 | 5.0 | 3.0 | 5.0 | 5.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 |
| 39778 | 26 | Never | Hetero | Muslim | 6.0 | 3.0 | 5.0 | 3.0 | 5.0 | 5.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 |
39779 rows × 15 columns
#duplicated values
duplicates = sum(df_psy.duplicated())
duplicates
16
#Number of rows on data
len(df_psy)
39779
#Percentage of duplicated values
duplicates/len(df_psy)*100
0.04022222780864275
<1% of data is duplicated hence safe to delete these duplicates
# duplicates
duplicates = df_psy[df_psy.duplicated]
duplicates
| age | married | orientation | religion | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 | Q8 | Q9 | Q10 | Q11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5840 | 20 | Never | Hetero | Muslim | 4.0 | 7.0 | 1.0 | 5.0 | 6.0 | 1.0 | 6.0 | 1.0 | 1.0 | 1.0 | 1.0 |
| 5958 | 19 | Never | New Types | Muslim | 5.0 | 6.0 | 7.0 | 7.0 | 7.0 | 7.0 | 4.0 | 4.0 | 3.0 | 2.0 | 4.0 |
| 8778 | 20 | Never | New Types | Muslim | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 2.0 | 2.0 | 2.0 |
| 13331 | 18 | Never | Hetero | Muslim | 1.0 | 1.0 | 7.0 | 7.0 | 7.0 | 7.0 | 1.0 | 4.0 | 4.0 | 4.0 | 4.0 |
| 19915 | 23 | Never | Hetero | Muslim | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.0 | 2.0 |
| 23087 | 18 | Never | Hetero | Muslim | 1.0 | 1.0 | 7.0 | 7.0 | 7.0 | 7.0 | 1.0 | 4.0 | 4.0 | 4.0 | 4.0 |
| 23201 | 18 | Never | Hetero | Muslim | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.0 | 1.0 |
| 24848 | 22 | Never | New Types | Muslim | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
| 26074 | 28 | Never | Hetero | Muslim | 6.0 | 6.0 | 5.0 | 4.0 | 6.0 | 5.0 | 6.0 | 2.0 | 1.0 | 1.0 | 2.0 |
| 28382 | 18 | Never | New Types | Muslim | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 4.0 | 4.0 | 4.0 | 4.0 |
| 32458 | 23 | Never | Hetero | Muslim | 5.0 | 6.0 | 6.0 | 5.0 | 5.0 | 5.0 | 5.0 | 3.0 | 2.0 | 2.0 | 3.0 |
| 38016 | 21 | Never | New Types | Muslim | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 3.0 | 2.0 | 2.0 | 2.0 |
| 38951 | 19 | Never | Hetero | Muslim | 2.0 | 2.0 | 7.0 | 6.0 | 5.0 | 7.0 | 2.0 | 4.0 | 4.0 | 4.0 | 4.0 |
| 39148 | 18 | Never | New Types | Muslim | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 1.0 | 1.0 |
| 39753 | 15 | Never | Hetero | Jewish | 3.0 | 6.0 | 6.0 | 6.0 | 3.0 | 2.0 | 2.0 | 3.0 | 4.0 | 4.0 | 4.0 |
| 39778 | 26 | Never | Hetero | Muslim | 6.0 | 3.0 | 5.0 | 3.0 | 5.0 | 5.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 |
#dropping duplicates
df_psy = df_psy.drop_duplicates()
#Number of current duplicates
duplicates = sum(df_psy.duplicated())
duplicates
0
df_psy
| age | married | orientation | religion | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 | Q8 | Q9 | Q10 | Q11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16 | Never | Hetero | Other | 1.0 | 7.0 | 7.0 | 7.0 | 7.0 | 5.0 | 1.0 | 4.0 | 2.0 | 4.0 | 4.0 |
| 1 | 16 | Never | New Types | Christian Other | 6.0 | 4.0 | 7.0 | 4.0 | 7.0 | 7.0 | 1.0 | 4.0 | 2.0 | 4.0 | 4.0 |
| 2 | 17 | Never | Homo | Christian | 2.0 | 2.0 | 2.0 | 6.0 | 5.0 | 5.0 | 3.0 | 3.0 | 4.0 | 3.0 | 4.0 |
| 3 | 13 | Never | New Types | Christian | 1.0 | 7.0 | 4.0 | 4.0 | 6.0 | 1.0 | 6.0 | 2.0 | 2.0 | 2.0 | 1.0 |
| 4 | 19 | Never | Hetero | Muslim | 2.0 | 3.0 | 6.0 | 5.0 | 5.0 | 6.0 | 3.0 | 2.0 | 3.0 | 4.0 | 4.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 39773 | 16 | Never | Asexual | Atheist | 2.0 | 3.0 | 5.0 | 5.0 | 5.0 | 3.0 | 3.0 | 2.0 | 3.0 | 3.0 | 4.0 |
| 39774 | 21 | Never | New Types | Muslim | 4.0 | 5.0 | 7.0 | 6.0 | 4.0 | 7.0 | 4.0 | 3.0 | 3.0 | 4.0 | 4.0 |
| 39775 | 48 | Married | Hetero | Christian Other | 6.0 | 7.0 | 5.0 | 3.0 | 6.0 | 1.0 | 5.0 | 2.0 | 2.0 | 1.0 | 2.0 |
| 39776 | 20 | Never | Hetero | Protestant | 1.0 | 5.0 | 7.0 | 5.0 | 3.0 | 5.0 | 3.0 | 3.0 | 2.0 | 4.0 | 4.0 |
| 39777 | 26 | Never | Hetero | Muslim | 6.0 | 3.0 | 5.0 | 3.0 | 5.0 | 5.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 |
39763 rows × 15 columns
There is a reduction in number of rows due to deletion of duplicated values
#info on the data
df_psy.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 39763 entries, 0 to 39777 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 age 39763 non-null int64 1 married 39763 non-null object 2 orientation 39762 non-null object 3 religion 39761 non-null object 4 Q1 39761 non-null float64 5 Q2 39758 non-null float64 6 Q3 39759 non-null float64 7 Q4 39761 non-null float64 8 Q5 39759 non-null float64 9 Q6 39759 non-null float64 10 Q7 39759 non-null float64 11 Q8 39758 non-null float64 12 Q9 39759 non-null float64 13 Q10 39760 non-null float64 14 Q11 39761 non-null float64 dtypes: float64(11), int64(1), object(3) memory usage: 4.9+ MB
null = df_psy[df_psy.isna().any(axis = 1)]#shows all null values
1. df_psy.isna() returns a DataFrame of the same shape as df_psy, where each element is either True if the corresponding element in df_psy is NaN or null, or False otherwise. This operation checks for missing values in each element of the DataFrame.
2. .any(axis=1) is used to check if there are any True values across each row. By specifying axis=1, it checks for the presence of at least one True value in each row. The result is a boolean Series with the same number of rows as df_psy, where each element indicates whether the corresponding row has any missing values.
3. df_psy[df_psy.isna().any(axis=1)] is indexing df_psy using the boolean Series from the previous step. It selects only those rows from df_psy where the corresponding value in the boolean Series is True, meaning that row has at least one missing value.
The result is assigned to null, which will be another DataFrame containing only the rows from df_psy that have missing values in any of their elements.
null
| age | married | orientation | religion | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 | Q8 | Q9 | Q10 | Q11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 90 | 15 | Never | New Types | NaN | 1.0 | NaN | NaN | 6.0 | NaN | 6.0 | NaN | NaN | NaN | 4.0 | 4.0 |
| 137 | 20 | Never | Hetero | Atheist | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 153 | 24 | Never | Homo | Hindu | 5.0 | NaN | 4.0 | 5.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN |
| 225 | 19 | Never | NaN | NaN | 2.0 | NaN | NaN | 6.0 | NaN | NaN | 3.0 | NaN | 2.0 | NaN | 3.0 |
| 257 | 15 | Never | Homo | Jewish | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4.0 | 2.0 |
#dropping null values
df_psy = df_psy.dropna()
df_psy
| age | married | orientation | religion | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 | Q8 | Q9 | Q10 | Q11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16 | Never | Hetero | Other | 1.0 | 7.0 | 7.0 | 7.0 | 7.0 | 5.0 | 1.0 | 4.0 | 2.0 | 4.0 | 4.0 |
| 1 | 16 | Never | New Types | Christian Other | 6.0 | 4.0 | 7.0 | 4.0 | 7.0 | 7.0 | 1.0 | 4.0 | 2.0 | 4.0 | 4.0 |
| 2 | 17 | Never | Homo | Christian | 2.0 | 2.0 | 2.0 | 6.0 | 5.0 | 5.0 | 3.0 | 3.0 | 4.0 | 3.0 | 4.0 |
| 3 | 13 | Never | New Types | Christian | 1.0 | 7.0 | 4.0 | 4.0 | 6.0 | 1.0 | 6.0 | 2.0 | 2.0 | 2.0 | 1.0 |
| 4 | 19 | Never | Hetero | Muslim | 2.0 | 3.0 | 6.0 | 5.0 | 5.0 | 6.0 | 3.0 | 2.0 | 3.0 | 4.0 | 4.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 39773 | 16 | Never | Asexual | Atheist | 2.0 | 3.0 | 5.0 | 5.0 | 5.0 | 3.0 | 3.0 | 2.0 | 3.0 | 3.0 | 4.0 |
| 39774 | 21 | Never | New Types | Muslim | 4.0 | 5.0 | 7.0 | 6.0 | 4.0 | 7.0 | 4.0 | 3.0 | 3.0 | 4.0 | 4.0 |
| 39775 | 48 | Married | Hetero | Christian Other | 6.0 | 7.0 | 5.0 | 3.0 | 6.0 | 1.0 | 5.0 | 2.0 | 2.0 | 1.0 | 2.0 |
| 39776 | 20 | Never | Hetero | Protestant | 1.0 | 5.0 | 7.0 | 5.0 | 3.0 | 5.0 | 3.0 | 3.0 | 2.0 | 4.0 | 4.0 |
| 39777 | 26 | Never | Hetero | Muslim | 6.0 | 3.0 | 5.0 | 3.0 | 5.0 | 5.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 |
39758 rows × 15 columns
There is a reduction in number of rows due to deletion of null values from the Dataframe
OUTLIERS
Outliers are the extreme values within the dataset that deviates significantly from the rest of the (so-called normal)objects—either being much larger or significantly smaller.
Outliers can be detected using
#using boxplot to find outlier for Q8
var = df_psy['Q8']
plt.boxplot(var)
plt.show()
There are no outliers
px.box(var, points = 'all')# is using the Plotly Express library in Python to create a box plot with all data points displayed
Based on the box plot using Plotly:
num = df_psy.drop(['married', 'orientation', 'religion'], axis=1)
fig = px.box(num.melt(),y = "value", facet_col='variable', boxmode= 'overlay', color = 'variable')#plotting outliers for all variables
fig.update_yaxes(matches = None)
Outliers are in age, Q4, Q5 & Q11
#finding maximum age
max_age = df_psy['age'].max()
max_age
1998
This age is impractical
max_age = df_psy[df_psy['age']>100] #finding age outliers
max_age
| age | married | orientation | religion | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 | Q8 | Q9 | Q10 | Q11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5342 | 223 | Never | Hetero | Muslim | 7.0 | 6.0 | 3.0 | 2.0 | 1.0 | 2.0 | 6.0 | 2.0 | 1.0 | 1.0 | 1.0 |
| 10233 | 1996 | Never | New Types | Muslim | 5.0 | 7.0 | 7.0 | 7.0 | 5.0 | 3.0 | 5.0 | 4.0 | 4.0 | 3.0 | 4.0 |
| 14238 | 117 | Never | Homo | Christian Other | 3.0 | 1.0 | 7.0 | 6.0 | 3.0 | 6.0 | 2.0 | 4.0 | 3.0 | 4.0 | 3.0 |
| 21385 | 1998 | Never | Asexual | Muslim | 1.0 | 1.0 | 6.0 | 7.0 | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | 2.0 | 2.0 |
| 24900 | 115 | Never | Homo | Christian | 5.0 | 3.0 | 6.0 | 6.0 | 5.0 | 6.0 | 2.0 | 3.0 | 2.0 | 1.0 | 2.0 |
| 30029 | 1993 | Married | Hetero | Muslim | 4.0 | 7.0 | 7.0 | 7.0 | 7.0 | 6.0 | 2.0 | 4.0 | 4.0 | 3.0 | 4.0 |
| 33732 | 1991 | Never | Hetero | Atheist | 2.0 | 7.0 | 7.0 | 5.0 | 6.0 | 6.0 | 2.0 | 3.0 | 3.0 | 3.0 | 4.0 |
df_psy = df_psy[df_psy['age']<100] #deletes the 7 rows which are age outliers
Q = df_psy[df_psy['Q4']>8] #finding outlier in Q4
Q
| age | married | orientation | religion | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 | Q8 | Q9 | Q10 | Q11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 34616 | 21 | Never | Hetero | Atheist | 3.0 | 7.0 | 3.0 | 33.0 | 7.0 | 1.0 | 5.0 | 3.0 | 4.0 | 4.0 | 4.0 |
The outlier in Q4 is 33 as according to the Plotly Express box plot the min value is 1 and max value 7
#new rows for new dataframe
df_psy = df_psy[df_psy['Q4']<8]
df_psy = df_psy[df_psy['Q5']<8]
df_psy = df_psy[df_psy['Q4']<5]
#plotting numeric variables without outliers
num = df_psy.drop(['married', 'orientation', 'religion'], axis=1)
fig = px.box(num.melt(),y = "value", facet_col='variable', boxmode= 'overlay', color = 'variable')#plotting outliers for all variables
fig.update_yaxes(matches = None)
NON-NUMERIC VARIABLES
#Analyzing possible outliers in non-numeric variables making sure they are proper
df_psy.married.unique()
array(['Never', 'Married', 'Previously', 'Second'], dtype=object)
df_psy.orientation.unique()
array(['New Types', 'Bi', 'Hetero', 'Homo', 'Asexual'], dtype=object)
df_psy.religion.unique()
array(['Christian Other', 'Christian', 'Atheist', 'Protestant',
'Agnostic', 'Other', 'Jewish', 'Hindu', 'Buddhist', 'Muslim',
'Mormon', 'Sikh'], dtype=object)
#changing to a numerical variable using dictionary
matrimonial = {
'Never' : 1,
'Previously' : 2,
'Married' : 3,
'Second' : 4
}
df_psy['MarriedN'] = df_psy['married'].map(matrimonial)#builds a new column and maps to the dictionary
sexuality = {
'New Types' : 1,
'Bi' : 2,
'Hetero' : 3,
'Homo' : 4,
'Asexual' : 5
}
df_psy['OrientationN'] = df_psy['orientation'].map(sexuality)#builds a new column and maps to the dictionary
religia = {
'Christian Other' : 1,
'Christian' : 2,
'Atheist' : 3,
'Protestant' : 4,
'Agnostic' : 5,
'Other' : 6,
'Jewish' : 7,
'Hindu' : 8,
'Buddhist' : 9,
'Muslim': 10,
'Mormon' : 11,
'Sikh' : 12
}
df_psy['ReligiaN'] = df_psy['religion'].map(religia)#builds a new column and maps to the dictionary
df_psy
| age | married | orientation | religion | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 | Q8 | Q9 | Q10 | Q11 | MarriedN | OrientationN | ReligiaN | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 16 | Never | New Types | Christian Other | 6.0 | 4.0 | 7.0 | 4.0 | 7.0 | 7.0 | 1.0 | 4.0 | 2.0 | 4.0 | 4.0 | 1 | 1 | 1 |
| 3 | 13 | Never | New Types | Christian | 1.0 | 7.0 | 4.0 | 4.0 | 6.0 | 1.0 | 6.0 | 2.0 | 2.0 | 2.0 | 1.0 | 1 | 1 | 2 |
| 6 | 17 | Never | Bi | Christian Other | 2.0 | 6.0 | 5.0 | 2.0 | 1.0 | 3.0 | 5.0 | 1.0 | 2.0 | 3.0 | 2.0 | 1 | 2 | 1 |
| 7 | 29 | Never | Bi | Atheist | 7.0 | 4.0 | 5.0 | 2.0 | 2.0 | 3.0 | 5.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1 | 2 | 3 |
| 10 | 15 | Never | Hetero | Protestant | 5.0 | 6.0 | 6.0 | 4.0 | 4.0 | 7.0 | 5.0 | 3.0 | 2.0 | 3.0 | 2.0 | 1 | 3 | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 39765 | 22 | Never | Hetero | Muslim | 1.0 | 7.0 | 2.0 | 4.0 | 6.0 | 1.0 | 4.0 | 1.0 | 1.0 | 2.0 | 3.0 | 1 | 3 | 10 |
| 39771 | 27 | Never | Hetero | Christian | 6.0 | 6.0 | 7.0 | 4.0 | 6.0 | 5.0 | 5.0 | 2.0 | 1.0 | 2.0 | 2.0 | 1 | 3 | 2 |
| 39772 | 16 | Never | Hetero | Agnostic | 6.0 | 4.0 | 5.0 | 3.0 | 5.0 | 2.0 | 5.0 | 3.0 | 2.0 | 2.0 | 4.0 | 1 | 3 | 5 |
| 39775 | 48 | Married | Hetero | Christian Other | 6.0 | 7.0 | 5.0 | 3.0 | 6.0 | 1.0 | 5.0 | 2.0 | 2.0 | 1.0 | 2.0 | 3 | 3 | 1 |
| 39777 | 26 | Never | Hetero | Muslim | 6.0 | 3.0 | 5.0 | 3.0 | 5.0 | 5.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 | 1 | 3 | 10 |
14313 rows × 18 columns